package com.xiaojiezhu.spark.rdd2

import org.apache.spark.{SparkConf, SparkContext}

/**
  * 求每个key的平均值
  */
object ScalaCombineByKey {

  def main(args : Array[String]) : Unit={
    val conf = new SparkConf().setMaster("local").setAppName("app")
    val sc = new SparkContext(conf)
    val rdd = sc.parallelize(List((1,2),(1,3),(2,10),(2,12),(1,6)))
    val combine = rdd.combineByKey(c => (c,1),(x : (Int,Int),y)=> (x._1 + y, x._2 + 1),(a : (Int,Int),b:(Int,Int)) => (a._1 + b._1 ,  a._2 + b._2))
    val result = combine.map(x => (x._1, x._2._1 / x._2._2))
    result.collectAsMap().foreach(x => println(x))
  }

}
